{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import bayes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 准备数据:从文本中构建词向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['how', 'cute', 'love', 'take', 'has', 'flea', 'so', 'ate', 'maybe', 'him', 'help', 'food', 'my', 'garbage', 'licks', 'is', 'not', 'buying', 'park', 'I', 'dalmation', 'quit', 'please', 'problems', 'stupid', 'posting', 'worthless', 'mr', 'steak', 'dog', 'to', 'stop']\n",
      "[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0]\n"
     ]
    }
   ],
   "source": [
    "listOPosts, listClasses = bayes.loadDataSet()\n",
    "myVocabList = bayes.createVocabList(listOPosts)\n",
    "print(myVocabList)\n",
    "testVector = bayes.setOfWords2Vec(myVocabList, listOPosts[0])\n",
    "print(testVector)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 训练算法:从词向量计算概率"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# $p(c_i|w) = \\frac{p(c_i|w)p(c_i)}{p(w)}$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0]]\n"
     ]
    }
   ],
   "source": [
    "trainMat = []\n",
    "for postinDoc in listOPosts:\n",
    "    trainMat.append(bayes.setOfWords2Vec(myVocabList, postinDoc))\n",
    "print(trainMat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 0, 1, 0, 1]\n"
     ]
    }
   ],
   "source": [
    "print(listClasses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.  0.  0.  0.  0.  0.  1.  1.  1.  1.  0.  0.  1.  0.  0.  1.  0.  0.\n",
      "  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  0.]\n",
      "8.0\n",
      "[ 0.  1.  1.  1.  0.  0.  1.  1.  1.  1.  0.  0.  1.  0.  1.  2.  0.  0.\n",
      "  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  0.]\n",
      "13.0\n",
      "[ 0.  1.  1.  2.  0.  0.  1.  2.  1.  1.  0.  0.  1.  0.  1.  3.  0.  0.\n",
      "  1.  0.  0.  0.  1.  0.  0.  0.  1.  1.  0.  1.  0.  0.]\n",
      "19.0\n"
     ]
    }
   ],
   "source": [
    "p0V, p1V, pAb = bayes.trainNB0(trainMat, listClasses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.125       0.          0.04166667  0.04166667  0.04166667  0.\n",
      "  0.04166667  0.04166667  0.04166667  0.04166667  0.04166667  0.04166667\n",
      "  0.04166667  0.04166667  0.04166667  0.          0.04166667  0.04166667\n",
      "  0.04166667  0.          0.04166667  0.          0.04166667  0.08333333\n",
      "  0.          0.          0.04166667  0.          0.04166667  0.          0.\n",
      "  0.        ]\n"
     ]
    }
   ],
   "source": [
    "print(p0V)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.          0.05263158  0.          0.          0.          0.05263158\n",
      "  0.05263158  0.          0.          0.          0.          0.          0.\n",
      "  0.10526316  0.05263158  0.15789474  0.          0.          0.\n",
      "  0.05263158  0.          0.05263158  0.          0.05263158  0.05263158\n",
      "  0.05263158  0.          0.05263158  0.          0.05263158  0.05263158\n",
      "  0.10526316]\n"
     ]
    }
   ],
   "source": [
    "print(p1V)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 测试算法：根据现实情况修改分类器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda\\lib\\site-packages\\ipykernel_launcher.py:7: RuntimeWarning: divide by zero encountered in log10\n",
      "  import sys\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xd416f49320>]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VPW9//HXJwuENSxJWBJC2CFsAkFE3BCQiq1Uq9bW\nturVS6169d6utvbe/m57e3/c9ndt66/31xa11t7WqrW1Yt2aqLhQkEX2hDUEEhKykB2SkGS+vz9m\n6qU0IQOTzJnMvJ+PBw9nJt/M9+OX4bznfM8532POOUREJHbFeV2AiIh4S0EgIhLjFAQiIjFOQSAi\nEuMUBCIiMU5BICIS4xQEIiIxTkEgIhLjFAQiIjEuwesCziUlJcVlZWV5XYaISK+xdevWKudc6vn8\nTkQHQVZWFlu2bPG6DBGRXsPMjpzv72hqSEQkxikIRERinIJARCTGKQhERGKcgkBEJMYpCEREYpyC\nQEQkxkX0dQQiIrHidJuP/eUN7DpWR+2pVr5w1YSw9a0gEBEJszM3+ruO1bH7WB17yxo43e4DIG1Q\nXz5/xXji4iws9SgIRER6UFcb/UFJCcxMT+bORVnMSE9mZnoyY4f3xyw8IQAKAhGRbuPzOQ5VNrKj\npI4dxbXsLKmlIMI2+h1REIiIXADnHKV1zewormVHSS07imvZfayexpY2AAb2jcyNfkcUBCIiQag5\neTqwwa9jZ4l/41/VeBqAxHgje9RgbpybzqyMIVw0JpnxKQPDNscfKgWBiMhZWtt97C1rYFtxDduO\n1rLtaA1FJ04BYAYTUgdy5eQ0Zo9JZnbGEKaOGkTfhHiPq75wCgIRiXnH65rZdrSGbcX+jf7Okjpa\n2vzz+ikD+zI3cwi3zB/DRWOGMDM9mUFJiR5X3L0UBCISU5pb29l9rM7/TT/wjb+srhmAPvFxTE8f\nzG0LxjIncwhzMoeQPqRfRM7rd6eQg8DMhgHPAllAEXCLc66mk7aDgQLgBefc/aH2LSLSlcqGFrYe\nqWHrkWq2HKlh97E6WtsdAGOG9WN+1rDARn8o03r5FM+F6o49goeAN5xzq83socDzr3XS9jvA293Q\np4jI3/D5HAcrG9lSVMOWI9VsPVLDkcDcfp/4OGZlJPN3i8Yxd+xQ5mYOJXVQX48rjgzdEQQrgasC\nj58C1tFBEJjZPGAE8BqQ0w39ikiMazrdzvbi2g+/7X9wpIb6Zv/pm8MH9GHe2KHctiCTeWOHMiM9\nOSa/7QejO4JghHOuDMA5V2ZmaWc3MLM44D+BzwJLuqFPEYlBdU2tbD1SzabDNWw6fIJdZ0zzTB4x\nkOtmjWLe2GHkjB0asefsR6KggsDM8oCRHfzo4SD7uRd4xTlX3NVfjJmtAlYBZGZmBvn2IhKNKhta\n2FxUzabD/j8Fx+txzn/e/qyMIdx9+XguzhrG3MyhJPePrjN5wimoIHDOLe3sZ2ZWbmajAnsDo4CK\nDpotBC43s3uBgUAfM2t0zj3UQV9rgDUAOTk5Lpj6RCQ6lNSc+nCjv6momsLKkwD0S4xn7tgh/OOS\nyVw8zn9wNylR0zzdpTumhtYCtwOrA/998ewGzrnb/vLYzO4AcjoKARGJLcdqm9hw6AQbDp1gY+EJ\njtU2ATA4KYH5WcP4ZM4YLh43jBnpySTG6/YpPaU7gmA18JyZ3QUcBW4GMLMc4B7n3N3d0IeIRIHy\n+uYPN/wbCk9wtNp/Rs+Q/olcMm44f3/5OBaMH86UEYN6zfIM0cCci9zZl5ycHLdlyxavyxCRC1TZ\n0MLGQv9Gf+OhExRW+ad6BiUlsGDccBZOGM7C8cOZOlIb/u5iZludc+d1ZqauLBaRbtPQ3MqGQydY\nf7CKPx86wYGKRsC/Euf8rKHcevEYFo5PIXv0YOK14Y8YCgIRuWCt7T52FNfy7oEq3jtYxfbiWtp9\njqTEOOZnDeOGueksHD+cmenJJGiOP2IpCEQkaM45DlWe5L0Dlbx3sIqNhdU0trRhBrPSk7nnyvFc\nNjGVuWOH6OKtXkRBICLnVNXYwvqDVbx7oIr1B6s+XKAtc1h/rr9oNJdPTGHhhOEM6d/H40rlQikI\nROSvtLX72F5cy7p9lazbX8HuY/UAJPdLZNHE4fzDxFQum5hC5vD+Hlcq3UVBICJUNDTz9r5K1u2v\n5N39ldQ3txEfZ8zNHMKXr5nMFZNTmT46WQd4o5SCQCQGtbX7+OBoLev2VbBuXyX5Zf5v/WmD+rJ8\n+kiumpLGZZNSSO6nZRtigYJAJEZUNDSzbq9/uufdA1U0BL71z8scyleWT+GqKalkjxqshdpikIJA\nJEo559hX3kBefjl5BRVsL64F/N/6r53h/9a/aKK+9YuCQCSqnG7zselwNXkF5eQVlFNS41+7Z3ZG\nMl9cNpkl09L0rV/+hoJApJerPXWadfsqySso5+19lTS0tNE3IY7LJqZw3+KJXD01jRGDk7wuUyKY\ngkCkFzp64hR/yj9OXkE5m4tqaPc5Ugb2ZcXMUSzNHsFlE1Po10cXdElwFAQivcSB8gZe3X2cV3cf\npyBwls/UkYP4wpUTWDItjdkZQ7Rwm1wQBYFIhHLOkV9Wz2uBjf/BwAJu88YO5ZvXTWP59JGMGaaL\nuiR0CgKRCOKcY3tx7Ycb/6PVp4gzWDBuOJ9bOJbl00dqvl+6nYJAxGPtPseWompe3X2c1/ccp6yu\nmcR449IJKdx71QSWZY9g+MC+XpcpUUxBIOIB5xzbimtZu72Ul3eVUdnQQp+EOK6cnMpXlk9hybQR\nOr9fwkZBIBJGe4/Xs3Z7KS/tLKW4uok+CXFcPSWN62aNYvHUNAb21T9JCT996kR62NETp3hpZykv\nbj/G/vJG4uOMRRNTeHDJZK6ZPoLBSfrmL95SEIj0gIr6Zv64s4y1O0o/XNohZ+xQvrNyOtfOHEWK\n5vwlgoQUBGY2DHgWyAKKgFucczUdtMsEHgfGAA5Y4ZwrCqVvkUhT39zKq7vKeHF7KRsLT+BzkD1q\nMA9dO5WPzhpFxlCd6imRKdQ9goeAN5xzq83socDzr3XQ7pfAd51zuWY2EPCF2K9IRPD5HH8+dILn\ntxbz2p7jNLf6yBren/uvnsT1s0cxMW2Q1yWKdCnUIFgJXBV4/BSwjrOCwMyygQTnXC6Ac64xxD5F\nPFdUdZLffVDC77aWUFrXzOCkBG6al8En5mZw0ZghWtRNepVQg2CEc64MwDlXZmZpHbSZDNSa2e+B\ncUAe8JBzrj3EvkXCqrGljVd2lvH81hI2FVUTZ3D5pFS+vmIay7JHkJSotX2kd+oyCMwsDxjZwY8e\nPo8+LgfmAEfxH1O4A3iik/5WAasAMjMzg+xCpGf4fI6Nh0/w/NYSXt11nKbWdsanDuCrH5nCjXMy\nGJmsq3yl9+syCJxzSzv7mZmVm9mowN7AKKCig2YlwDbnXGHgd/4AXEInQeCcWwOsAcjJyXFd/y+I\ndL9jtU08t7mY331QQklNE4P6JvDxOencNC+DuZma+pHoEurU0FrgdmB14L8vdtBmMzDUzFKdc5XA\n1cCWEPsV6XbtPsc7Byr59cYjvLm3AgdcNjGFryyfwvLpIzX1I1Er1CBYDTxnZnfhn/a5GcDMcoB7\nnHN3O+fazezLwBvm/xq1FXgsxH5Fuk1VYwvPbSnm6fePUlLTRMrAvtx71URuvXiMTvmUmBBSEDjn\nTgBLOnh9C3D3Gc9zgVmh9CXSnZxzbDpcza/eP8pru8tobXcsHD+ch66dyjXZI+mTEOd1iSJhoyuL\nJabUNbXywgcl/Pr9oxyoaGRwUgKfuWQsty0Yy8S0gV6XJ+IJBYHEhF0ldfxq4xHW7iilqbWd2RnJ\nfO+mWXxs1mjd0lFinoJAolZbu4/X95Tz+HuFbDtaS7/EeFZeNJrbFoxlZkay1+WJRAwFgUSdxpY2\nnttczM/XH6akpomxw/vzrY9lc+PcDK3xL9IBBYFEjbK6Jn6xvoinNx2lobmNnLFD+eZ12SzLHkG8\nbuou0ikFgfR6u4/V8di7hby8swyfc1w7cxR3XzaOOZlDvS5NpFdQEEiv5PM53tpXwWPvFrKxsJoB\nfeK5/dIs7rg0izHDdO6/yPlQEEiv0tzazu8+KOGJ9w5TWHmSUclJfGPFVG69OFN3+hK5QAoC6RVO\nnW7jVxuPsOadQqoaTzMzPZkf3XoRK2aOIjFeF3+JhEJBIBHtZEsbv9xwhMfeLaT65Gkun5TCfYsn\nsmDcMC38JtJNFAQSkRpb2njqz0U8/m4hNadauWJyKg8umcS8sToALNLdFAQSUeqbW3lqfRFPrD9M\n7alWFk9J5YElk3QGkEgPUhBIRKhrauUX64t44r1C6pvbWDI1jQeWTGL2mCFelyYS9RQE4qm6U608\nsf4wT64/TENzG8uyR/DgkknMSNcSECLhoiAQTzQ0t/LYO4U8ub6IhpY2lk8fwQNLJjF9tAJAJNwU\nBBJWre0+ntl0lB/mHeDEydNcO2MkDyyZxLRRg70uTSRmKQgkLJxz5OaXs/rVvRRWneSS8cN4csU0\nZmXoGICI1xQE0uN2FNfy3VcK2HS4mgmpA3j8czksmZam6wBEIoSCQHpMcfUpvv/6PtbuKGX4gD78\n28dncOv8MSToSmCRiKIgkG5X19TK/3vrIE+uLyIuDu5fPJHPXzmeQVoLSCQihRwEZjYMeBbIAoqA\nW5xzNR20+x5wHRAH5AIPOudcqP1L5Djd5uNXG4/w6JsHqGtq5RNzM/jSNZMZldzP69JE5By6Y4/g\nIeAN59xqM3so8PxrZzYws0uBRcCswEvvAVcC67qhf/GYc47Xdh9n9Wt7OXLiFIsmDucbK6bpVFCR\nXqI7gmAlcFXg8VP4N+5fO6uNA5KAPoABiUB5N/QtHiuqOsk3/7Cb9w5WMXnEQJ68cz5XTU7VgWCR\nXqQ7gmCEc64MwDlXZmZpZzdwzm0ws7eAMvxB8GPnXEFHb2Zmq4BVAJmZmd1QnvSE020+1rxziEff\nPEjf+Di+vXI6n744UweCRXqhoILAzPKAkR386OEgf38iMA3ICLyUa2ZXOOfeObutc24NsAYgJydH\nxxAi0Jaiar7xwi72lzdy7YyR/K/rpzNicJLXZYnIBQoqCJxzSzv7mZmVm9mowN7AKKCig2Y3ABud\nc42B33kVuAT4myCQyFXX1Mp/vLaXp98/yujkJB7/XA5Ls0d4XZaIhKg79uPXArcHHt8OvNhBm6PA\nlWaWYGaJ+A8Udzg1JJHHOcdLO0pZ8p9v88ymo9x12Thyv3ilQkAkSnTHMYLVwHNmdhf+Df7NAGaW\nA9zjnLsbeB64GtiF/8Dxa865l7qhb+lhxdWn+OcXd7NuXyUz05P5xZ3ztTKoSJQJOQiccyeAJR28\nvgW4O/C4Hfh8qH1J+LS1+/j5+sP8IPcAZvDPH83m9oVjdTBYJArpymL5G9uLa/n673dRUFbP0mkj\n+PbK6YweoovCRKKVgkA+dLrNx3/+aR9r3i0kbVBffvqZeSyfPkLXBIhEOQWBAHC46iQP/GYbu47V\n8ekFmXz92qlaG0gkRigIYpxzjt99cIx/eXE3fRLi+Nln57F8ekeXjIhItFIQxLD65la++cJu1u4o\nZcG4Yfzw1ou0QJxIDFIQxKgPjtbw4DPbKK1t5kvLJnPv4onEx+lYgEgsUhDEmHaf46dvH+KR3P2M\nHJzEc59fyLyxQ70uS0Q8pCCIIcfrmvmnZ7ezofAEH501iu/eMJPkfjogLBLrFAQxIi+/nK88v4Pm\nVh/f+8Qsbs7J0GmhIgIoCKJec2s7//5KAb/ccITpowfz6KfmMCF1oNdliUgEURBEsQPlDfzDb7ax\n93gDd102jq9+ZAp9E+K9LktEIoyCIEq9ta+C+3/9AUmJ8Tx553wWT/mb+wWJiAAKgqj03xuK+Nba\nPUwbNZgnbp/PyGTdNEZEOqcgiCLtPsd3Xy7g5+sPs3RaGj+6dQ4D+uqvWETOTVuJKHHqdBsP/GY7\neQXl3Lkoi29el60LxEQkKAqCKFBe38xdT20mv7Sef71+OrdfmuV1SSLSiygIern80nruemoz9U2t\nPH57DldP1e0jReT8KAh6sb+cGTQoKZHf3nMp2aMHe12SiPRCCoJeSmcGiUh3CekGtGZ2s5ntMTNf\n4Gb1nbX7iJntM7ODZvZQKH3Gunaf49sv5fPPL+7h6qlpPPf5hQoBEQlJqHsEu4EbgZ911sDM4oH/\nApYBJcBmM1vrnMsPse+YozODRKQnhBQEzrkCoKvFyy4GDjrnCgNtnwFWAgqC83DmmUHfXjmdzy3M\n8rokEYkS4ThGkA4Un/G8BFgQhn6jRnH1KT75sw3UNbXyxO3zWTxVy0WISPfpMgjMLA/o6Ca2Dzvn\nXgyij452F9w5+lsFrALIzMwM4u2j2/G6Zj79+EZOnm7n2c8vZEZ6stcliUiU6TIInHNLQ+yjBBhz\nxvMMoPQc/a0B1gDk5OR0GhixoKqxhdse30jNyVZ+ffcChYCI9IiQzhoK0mZgkpmNM7M+wK3A2jD0\n26vVnWrls09s4lhtEz+/Yz6zxwzxuiQRiVKhnj56g5mVAAuBl83s9cDro83sFQDnXBtwP/A6UAA8\n55zbE1rZ0a2xpY3bn9zEoYpG1nw2h4vHDfO6JBGJYqGeNfQC8EIHr5cCK854/grwSih9xYqm0+38\n3S82s+tYHT+5bS5XTE71uiQRiXLhmBqSILW0tXPPr7ayuaiaR26ZzTXTOzpGLyLSvRQEEaKt3ccD\nv9nG2/srWX3jTFZelO51SSISIxQEEcDnc3z5tzt4fU853/pYNp+cr9NmRSR8FAQec87x8B9284ft\npXxl+RTuXDTO65JEJMYoCDzknOPfXi7gN5uOct/iCdy3eKLXJYlIDFIQeOgHeQd44r3D3HFpFl++\nZorX5YhIjFIQeOSnbx/i0TcO8MmcMfzLR7O7WrhPRKTHKAg88MsNRax+dS8fmz2af79xJnFaSlpE\nPKQgCLPc/HL+5cU9LMsewSO3zNb9BETEcwqCMDpW28SXf7uDGemD+b+fmkNivIZfRLynLVGYtAYu\nGGv3OX78qbkkJcZ7XZKICKCb14fND3L3s/VIDY9+ag5ZKQO8LkdE5EPaIwiDdw9U8pO3D3Hr/DFc\nP3u01+WIiPwVBUEPq2ho5p+e3c6ktIF862PTvS5HRORvaGqoB7X7HP/07HYaW9p4+u8voV8fHRcQ\nkcijIOhBP1l3kPUHT/Afn5jJ5BGDvC5HRKRDmhrqIZsOV/NI7n6unz2aW3LGdP0LIiIeURD0gJqT\np3nwmW2MGdaf794wQ8tHiEhE09RQN3POf2+BqsYWfv+FRQxKSvS6JBGRc9IeQTf7+foi3thbwTdW\nTGNmRrLX5YiIdCmkIDCzm81sj5n5zCynkzZjzOwtMysItH0wlD4j2c6SWla/WsCy7BHccWmW1+WI\niAQl1D2C3cCNwDvnaNMGfMk5Nw24BLjPzLJD7Dfi1De3cv/T20gd2Jfv3zRLxwVEpNcI6RiBc64A\nOOdGzzlXBpQFHjeYWQGQDuSH0nckcc7x9d/v4lhtE8+uuoQh/ft4XZKISNDCeozAzLKAOcD752iz\nysy2mNmWysrKcJUWkt9sKublnWV8cdlkcrKGeV2OiMh56XKPwMzygJEd/Ohh59yLwXZkZgOB3wH/\n6Jyr76ydc24NsAYgJyfHBfv+Xtl7vJ5/fWkPl09K4QtXTvC6HBGR89ZlEDjnlobaiZkl4g+BXzvn\nfh/q+0WKU6fbuP/pbQxKSuSRWy7SncZEpFfq8esIzH8A4QmgwDn3SE/3F04/zDvAocpG/vvvFpA6\nqK/X5YiIXJBQTx+9wcxKgIXAy2b2euD10Wb2SqDZIuCzwNVmtj3wZ0VIVUeAsromfvHnIm6ck8Fl\nk1K8LkdE5IKFetbQC8ALHbxeCqwIPH4PiLo5k0ffOIhzjn9cOsnrUkREQqIriy9AUdVJnttSzKcu\nzmTMsP5elyMiEhIFwQV4JHc/ifHG/VdP9LoUEZGQKQjOU35pPWt3lHLnonGkDUryuhwRkZApCM7T\nI7n7GJSUwD1X6JoBEYkOCoLzsPVIDXkFFdxz5QSS+2t5aRGJDgqCIDnn+P7re0kZ2Ecri4pIVFEQ\nBOm9g1VsLKzmvsUTGdBX9/MRkeihIAiCf29gH+lD+vHpBZlelyMi0q0UBEF4fc9xdpbU8eDSSfRN\niPe6HBGRbqUg6EK7z/F//rSfCakDuHFOutfliIh0OwVBF/6w7RgHKxr50jVTSIjXcIlI9NGW7RxO\nt/n4Qd5+ZqQP5iPTO7olg4hI76cgOIdnNh+lpKaJryyfqnsNiEjUUhB04tTpNh594yAXjxvGFVpm\nWkSimIKgE7/4cxFVjS18dfkU/PfWERGJTgqCDtQ1tfLTdYe4emqabkYvIlFPQdCBx94ppL65jS9d\nM9nrUkREepyC4CyVDS38fP1hPjprFNNHJ3tdjohIj1MQnOW/3jpIS5uPLy7T3oCIxIZQb15/s5nt\nMTOfmeV00TbezLaZ2R9D6bMnldSc4un3j3LzvAzGpw70uhwRkbAIdY9gN3Aj8E4QbR8ECkLsr0c9\n+sYBMHhgiW5ILyKxI6QgcM4VOOf2ddXOzDKA64DHQ+mvJx2saOT5rSV89pKxjB7Sz+tyRETCJlzH\nCH4IfBXwham/8/bjNw/QLzGee6/SLShFJLZ0eYcVM8sDOlpo52Hn3ItB/P5HgQrn3FYzuyqI9quA\nVQCZmeFZ+7+lrZ3c/HKuvyid4QP7hqVPEZFI0WUQOOeWhtjHIuB6M1sBJAGDzexXzrnPdNLfGmAN\nQE5Ojgux76BsOHSCk6fbuSZ7RDi6ExGJKD0+NeSc+7pzLsM5lwXcCrzZWQh4JTe/nP594lk4YbjX\npYiIhF2op4/eYGYlwELgZTN7PfD6aDN7pTsK7Gk+nyOvoJwrJqWSlKi7j4lI7AnpLuzOuReAFzp4\nvRRY0cHr64B1ofTZ3XaX1lFe38IyTQuJSIyK+SuLc/PLiTNYPDXN61JERDyhIMgvJydrGMMG9PG6\nFBERT8R0EBRXn2Lv8QadLSQiMS2mgyCvoByApdMUBCISu2I6CHLzy5mUNpCslAFelyIi4pmYDYK6\nU628f7iapZoWEpEYF7NBsG5/Be0+p9NGRSTmxWwQ/Cm/nJSBfbkoY4jXpYiIeComg+B0m4+391Wy\ndFoacXHmdTkiIp6KySDYWHiCxpY2TQuJiBCjQZCbX06/xHgWTUzxuhQREc/FXBA4519k7vJJKVpk\nTkSEGAyCPaX1lNU167RREZGAmAuCvywyt0SLzImIADEaBPPGDtUtKUVEAmIqCI7VNpFfVq+1hURE\nzhBTQZCX719kTqeNioj8j5gKgtz8csanDmB86kCvSxERiRgxEwT1za1sLDyhvQERkbPETBCs21dJ\nm8+xTMcHRET+SkhBYGY3m9keM/OZWc452g0xs+fNbK+ZFZjZwlD6vRB5+eUMH9CHOZlDw921iEhE\nC3WPYDdwI/BOF+1+BLzmnJsKzAYKQuz3vLS2+3hrXwVLpqURr0XmRET+SkIov+ycKwAw63zjamaD\ngSuAOwK/cxo4HUq/52vT4Woamtt02qiISAfCcYxgPFAJPGlm28zscTML670hc/PL6ZsQx+WTUsPZ\nrYhIr9BlEJhZnpnt7uDPyiD7SADmAj9xzs0BTgIPnaO/VWa2xcy2VFZWBtlF55xz5Ob7F5nr10eL\nzImInK3LqSHn3NIQ+ygBSpxz7weeP885gsA5twZYA5CTk+NC7JuCsgaO1TbxwJKJob6ViEhU6vGp\nIefccaDYzKYEXloC5Pd0v3+Rm1+OGVw9VccHREQ6EurpozeYWQmwEHjZzF4PvD7azF45o+k/AL82\ns53ARcC/h9Lv+cgrKGfOmCGkDtIicyIiHQn1rKEXgBc6eL0UWHHG8+1Ap9cZ9JSyuiZ2Havjax+Z\nGu6uRUR6jai+sjivoAKAZdm694CISGeiOghy88sZlzKACVpkTkSkU1EbBA3NrWw4VMXSaWnnvOBN\nRCTWRW0QvLO/itZ2x7LskV6XIiIS0aI2CHLzjzO0fyLzxmqRORGRc4nKIGht9/Hm3gqunjpCi8yJ\niHQhKoNgc1E19c1tugmNiEgQojII8vIr6JMQx+WTUrwuRUQk4kVdEDjnyC04zmUTUxjQN6Tr5URE\nYkLUbSmbW31cOj6FRdobEBEJStQFQb8+8fzHTbO8LkNEpNeIuqkhERE5PwoCEZEYpyAQEYlxCgIR\nkRinIBARiXEKAhGRGKcgEBGJcQoCEZEYZ845r2volJlVAkeAFKDK43IigcbBT+Pgp3Hw0zj4/WUc\nxjrnUs/nFyM6CP7CzLY453K8rsNrGgc/jYOfxsFP4+AXyjhoakhEJMYpCEREYlxvCYI1XhcQITQO\nfhoHP42Dn8bB74LHoVccIxARkZ7TW/YIRESkh0RMEJjZR8xsn5kdNLOHOvh5XzN7NvDz980sK/xV\n9rwgxuEKM/vAzNrM7CYvagyXIMbii2aWb2Y7zewNMxvrRZ09LYhxuMfMdpnZdjN7z8yyvaizp3U1\nDme0u8nMnJlF5ZlEQXwe7jCzysDnYbuZ3d3lmzrnPP8DxAOHgPFAH2AHkH1Wm3uBnwYe3wo863Xd\nHo1DFjAL+CVwk9c1ezwWi4H+gcdfiOHPxOAzHl8PvOZ13V6MQ6DdIOAdYCOQ43XdHn0e7gB+fD7v\nGyl7BBcDB51zhc6508AzwMqz2qwEngo8fh5YYmYWxhrDoctxcM4VOed2Aj4vCgyjYMbiLefcqcDT\njUBGmGsMh2DGof6MpwOAaDzwF8w2AuA7wPeA5nAWF0bBjsN5iZQgSAeKz3heEnitwzbOuTagDhge\nlurCJ5hxiBXnOxZ3Aa/2aEXeCGoczOw+MzuEfyP4QJhqC6cux8HM5gBjnHN/DGdhYRbsv4tPBKZM\nnzezMV29aaQEQUff7M/+VhNMm94uFv4fgxX0WJjZZ4Ac4Ps9WpE3ghoH59x/OecmAF8DvtnjVYXf\nOcfBzOKAHwBfCltF3gjm8/ASkOWcmwXk8T8zKZ2KlCAoAc5MrQygtLM2ZpYAJAPVYakufIIZh1gR\n1FiY2VI0aOHtAAABPElEQVTgYeB651xLmGoLp/P9TDwDfLxHK/JGV+MwCJgBrDOzIuASYG0UHjDu\n8vPgnDtxxr+Fx4B5Xb1ppATBZmCSmY0zsz74DwavPavNWuD2wOObgDdd4MhIFAlmHGJFl2MRmAr4\nGf4QqPCgxnAIZhwmnfH0OuBAGOsLl3OOg3OuzjmX4pzLcs5l4T9mdL1zbos35faYYD4Po854ej1Q\n0OW7en0U/Iwj3SuA/fiPiD8ceO3b+P8yAZKA3wIHgU3AeK9r9mgc5uP/VnASOAHs8bpmD8ciDygH\ntgf+rPW6Zo/G4UfAnsAYvAVM97pmL8bhrLbriMKzhoL8PPzvwOdhR+DzMLWr99SVxSIiMS5SpoZE\nRMQjCgIRkRinIBARiXEKAhGRGKcgEBGJcQoCEZEYpyAQEYlxCgIRkRj3/wFKbKIyoSPgxwAAAABJ\nRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xd416bb4390>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from pylab import *\n",
    "\n",
    "x = np.arange(0.0, 0.5, 0.02)\n",
    "y1 = np.log10(x)\n",
    "\n",
    "plt.figure()\n",
    "plt.subplot(111)\n",
    "plt.plot(x, y1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['love', 'my', 'dalmation'] classified as:  0\n",
      "['stupid', 'garbage'] classified as:  1\n"
     ]
    }
   ],
   "source": [
    "bayes.testingNB()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]\n"
     ]
    }
   ],
   "source": [
    "testEntry = ['love', 'my', 'dalmation']\n",
    "thisDoc = array(bayes.setOfWords2Vec(myVocabList, testEntry))\n",
    "print(thisDoc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda\\lib\\re.py:212: FutureWarning: split() requires a non-empty pattern match.\n",
      "  return _compile(pattern, flags).split(string, maxsplit)\n"
     ]
    },
    {
     "ename": "UnicodeDecodeError",
     "evalue": "'utf-8' codec can't decode byte 0x92 in position 884: invalid start byte",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mUnicodeDecodeError\u001b[0m                        Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-2-cd56b8e8917e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbayes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mspamTest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m~\\机器学习实战\\ch4\\bayes.py\u001b[0m in \u001b[0;36mspamTest\u001b[1;34m()\u001b[0m\n\u001b[0;32m    132\u001b[0m         \u001b[0mfullText\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwordList\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    133\u001b[0m         \u001b[0mclassList\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m         \u001b[0mwordList\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtextParse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'email/ham/%d.txt'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    135\u001b[0m         \u001b[0mdocList\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwordList\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    136\u001b[0m         \u001b[0mfullText\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mwordList\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\anaconda\\lib\\codecs.py\u001b[0m in \u001b[0;36mdecode\u001b[1;34m(self, input, final)\u001b[0m\n\u001b[0;32m    319\u001b[0m         \u001b[1;31m# decode input (taking the buffer into account)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    320\u001b[0m         \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 321\u001b[1;33m         \u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconsumed\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_buffer_decode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfinal\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    322\u001b[0m         \u001b[1;31m# keep undecoded input until the next call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    323\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuffer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mconsumed\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mUnicodeDecodeError\u001b[0m: 'utf-8' codec can't decode byte 0x92 in position 884: invalid start byte"
     ]
    }
   ],
   "source": [
    "bayes.spamTest()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda\\lib\\re.py:212: FutureWarning: split() requires a non-empty pattern match.\n",
      "  return _compile(pattern, flags).split(string, maxsplit)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['peter',\n",
       " 'with',\n",
       " 'jose',\n",
       " 'out',\n",
       " 'town',\n",
       " 'you',\n",
       " 'want',\n",
       " 'meet',\n",
       " 'once',\n",
       " 'while',\n",
       " 'keep',\n",
       " 'things',\n",
       " 'going',\n",
       " 'and',\n",
       " 'some',\n",
       " 'interesting',\n",
       " 'stuff',\n",
       " 'let',\n",
       " 'know',\n",
       " 'eugene']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bayes.textParse(open('email/ham/1.txt').read())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
